'''
Author: momochong0
Date: 2021-05-03 14:28:02
LastEditors: momochong0
LastEditTime: 2021-05-09 19:14:20
Description: 要推网络荣誉出品
'''

import requests
import re
import time
from concurrent.futures import ThreadPoolExecutor


def download_one_page(url):
    # print(url)
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"
    }
    html = requests.get(url=url, headers=headers)
    # html.encoding = "gbk"
    # print(html.content)
    # 定义用于接收数据的列表
    referencComments, referenceName, referenceTime, nickname = [], [], [], []
    # 使用正则获取关键数据
    # 用户名：
    nickname += re.findall(r'","nickname":"(.*?)","', html.text)
    # 时间：
    referenceTime += re.findall(r'","creationTime":"(.*?)","', html.text)
    # 产品名称：
    referenceName += re.findall(r'","referenceName":"(.*?)","', html.text)
    # 评论：
    referencComments += re.findall(r'","content":"(.*?)","', html.text)

    # 先把数据一一对应
    Info = [nickname, referenceTime, referenceName, referencComments]
    # 打印
    for index, i in enumerate(range(0, len(nickname))):
        print(index, nickname[i], referenceTime[i], referenceName[i],
              referencComments[i])


if __name__ == '__main__':
    # 获取页面源代码
    jdurl = 'https://item.jd.com/10024805341688.html'
    pattern = re.compile(r'(?<=.com/)\d*')  # 通过获取商品ID
    pid = pattern.findall(jdurl)
    with ThreadPoolExecutor(5) as t:
        for page in range(1, 100):
            time.sleep(3)
            t.submit(
                download_one_page(
                    url=
                    f'https://club.jd.com/comment/productPageComments.action?productId={pid[0]}&score=0&sortType=5&page={page}&pageSize=10&isShadowSku=0&rid=0&fold=1'
                ))
